{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = '123'\n",
    "a == '123'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<_sre.SRE_Match object; span=(0, 1), match='1'>"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import re\n",
    "a = '1'\n",
    "#re.search('1', a)\n",
    "#re.search('[0123456789]', a)\n",
    "#re.search('[0-9]', a) # [0-9] = [0123456789]\n",
    "re.search('\\d', a) #\\d = [0-9]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<_sre.SRE_Match object; span=(0, 1), match='3'>"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = 'a'\n",
    "re.search('a', a)\n",
    "re.search('[abcdefghijklmnopqrstuvwxyz]', a)\n",
    "re.search('[a-z]', a)\n",
    "b = 'A'\n",
    "#re.search('[a-z]', b)\n",
    "re.search('[a-zA-Z]', b)\n",
    "c = '3'\n",
    "re.search('[a-zA-Z0-9]', c)\n",
    "re.search('\\w', c) # \\w = [a-zA-Z0-9] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<_sre.SRE_Match object; span=(0, 7), match='abcdefg'>"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = 'abc'\n",
    "re.search('\\w', a)\n",
    "re.search('\\w{2}', a)\n",
    "re.search('\\w{3}', a)\n",
    "\n",
    "re.search('\\w{1,3}', a)\n",
    "\n",
    "b = 'abcdefg'\n",
    "re.search('\\w{1,3}', b)\n",
    "re.search('\\w+', b) #\\w+ = \\w{1, inf}\n",
    "re.search('\\w*', b) #\\w* = \\w{0, inf}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<_sre.SRE_Match object; span=(12, 23), match='13412341234'>"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = 'my phone is 134-1234-1234'\n",
    "re.search('\\d{3}-\\d{4}-\\d{4}', a)\n",
    "\n",
    "b = 'my phone is 13412341234'\n",
    "re.search('\\d{3}\\d{4}\\d{4}', b)\n",
    "\n",
    "re.search('\\d{3}-{0,1}\\d{4}-{0,1}\\d{4}', a)\n",
    "re.search('\\d{3}-{0,1}\\d{4}-{0,1}\\d{4}', b)\n",
    "\n",
    "re.search('\\d{3}-?\\d{4}-?\\d{4}', a) # ? = {0,1}\n",
    "re.search('\\d{3}-?\\d{4}-?\\d{4}', b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('david', 'largitdata')"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import re\n",
    "email = 'david@largitdata.com'\n",
    "re.match('\\w+@\\w+', email)\n",
    "m = re.match('(\\w+)@(\\w+)', email)\n",
    "m.group(1)\n",
    "m.group(2)\n",
    "m.groups()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('david', 'largitdata.com')"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "m = re.match('(\\w+)@([a-z\\.]+)', email)\n",
    "m.groups()\n",
    "\n",
    "m = re.match('(\\w+)@(.+)', email)\n",
    "m.groups()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('1999', '5')"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "digit = '1999.5'\n",
    "m = re.match('(\\d+)\\.(\\d+)', digit)\n",
    "m.groups()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Chiu'"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "name = 'David Chiu'\n",
    "m = re.match('(\\w+) (\\w+)', name)\n",
    "m.group(1)\n",
    "m.group(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Chiu'"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "m = re.match('(?P<first_name>\\w+) (?P<last_name>\\w+)', name)\n",
    "m.group('first_name')\n",
    "m.group('last_name')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "file.txt root 10.0.0.1 ./\n"
     ]
    }
   ],
   "source": [
    "str1 = 'scp file.txt root@10.0.0.1:./'\n",
    "m = re.search('^scp ([\\w\\.]+) (\\w+)@([\\w\\.]+):(.+)', str1)\n",
    "if m:\n",
    "    print(m.group(1), m.group(2), m.group(3), m.group(4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>标题</th>\n",
       "      <th>建筑面积</th>\n",
       "      <th>总价</th>\n",
       "      <th>户型</th>\n",
       "      <th>朝向</th>\n",
       "      <th>物 业 费</th>\n",
       "      <th>环比上月</th>\n",
       "      <th>张贴日期</th>\n",
       "      <th>产权性质</th>\n",
       "      <th>装修</th>\n",
       "      <th>楼层</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>资深经纪上海人：稀缺3房，南北通透采光好，房东急需用钱！</td>\n",
       "      <td>123.50</td>\n",
       "      <td>760</td>\n",
       "      <td>3室2厅0厨2卫</td>\n",
       "      <td>南北</td>\n",
       "      <td>1.5元/平米・月</td>\n",
       "      <td>↑1.44%</td>\n",
       "      <td>西元2017年4月12日</td>\n",
       "      <td>商品房</td>\n",
       "      <td>豪华装修</td>\n",
       "      <td>低层</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>未来规划小上海，精装带地暖，养老，亲子，旅游度假一体化</td>\n",
       "      <td>86.00</td>\n",
       "      <td>76</td>\n",
       "      <td>2室2厅1厨1卫</td>\n",
       "      <td>南北</td>\n",
       "      <td>1.8元/・月</td>\n",
       "      <td>↑4.08%</td>\n",
       "      <td>西元2017年4月13日</td>\n",
       "      <td>商品房</td>\n",
       "      <td>精装修</td>\n",
       "      <td>中层</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>双高铁 不限贷 买一层送一层 送大露台 看房专车接送</td>\n",
       "      <td>90.00</td>\n",
       "      <td>80</td>\n",
       "      <td>3室2厅1厨2卫</td>\n",
       "      <td>南北</td>\n",
       "      <td>暂无资料</td>\n",
       "      <td>↓0.01%</td>\n",
       "      <td>西元2017年4月14日</td>\n",
       "      <td>商品房</td>\n",
       "      <td>精装修</td>\n",
       "      <td>高层</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>坐拥嘉兴海宁双轨 享受市区配套 现房值得拥有 可落户</td>\n",
       "      <td>90.00</td>\n",
       "      <td>80</td>\n",
       "      <td>2室1厅1厨1卫</td>\n",
       "      <td>南北</td>\n",
       "      <td>暂无资料</td>\n",
       "      <td>↓0.01%</td>\n",
       "      <td>西元2017年4月15日</td>\n",
       "      <td>商品房</td>\n",
       "      <td>简装修</td>\n",
       "      <td>中层</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>对口明强校区，精装两房税费少，位置安静，媲美园艺新村，随时看</td>\n",
       "      <td>66.61</td>\n",
       "      <td>285</td>\n",
       "      <td>2室1厅1厨1卫</td>\n",
       "      <td>南</td>\n",
       "      <td>0.8元/平米・月</td>\n",
       "      <td>↓2.62%</td>\n",
       "      <td>西元2017年4月16日</td>\n",
       "      <td>个人产权</td>\n",
       "      <td>中装修</td>\n",
       "      <td>低层</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                               标题    建筑面积   总价        户型  朝向      物 业 费  \\\n",
       "0    资深经纪上海人：稀缺3房，南北通透采光好，房东急需用钱！  123.50  760  3室2厅0厨2卫  南北  1.5元/平米・月   \n",
       "1     未来规划小上海，精装带地暖，养老，亲子，旅游度假一体化   86.00   76  2室2厅1厨1卫  南北    1.8元/・月   \n",
       "2      双高铁 不限贷 买一层送一层 送大露台 看房专车接送   90.00   80  3室2厅1厨2卫  南北       暂无资料   \n",
       "3      坐拥嘉兴海宁双轨 享受市区配套 现房值得拥有 可落户   90.00   80  2室1厅1厨1卫  南北       暂无资料   \n",
       "4  对口明强校区，精装两房税费少，位置安静，媲美园艺新村，随时看   66.61  285  2室1厅1厨1卫   南  0.8元/平米・月   \n",
       "\n",
       "     环比上月          张贴日期  产权性质    装修  楼层  \n",
       "0  ↑1.44%  西元2017年4月12日   商品房  豪华装修  低层  \n",
       "1  ↑4.08%  西元2017年4月13日   商品房   精装修  中层  \n",
       "2  ↓0.01%  西元2017年4月14日   商品房   精装修  高层  \n",
       "3  ↓0.01%  西元2017年4月15日   商品房   简装修  中层  \n",
       "4  ↓2.62%  西元2017年4月16日  个人产权   中装修  低层  "
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas\n",
    "df = pandas.read_excel('data/house_sample.xlsx')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df[['室','厅','厨','卫']] = df['户型'].str.extract('(\\d+)室(\\d+)厅(\\d+)厨(\\d+)卫', expand=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>标题</th>\n",
       "      <th>建筑面积</th>\n",
       "      <th>总价</th>\n",
       "      <th>户型</th>\n",
       "      <th>朝向</th>\n",
       "      <th>物 业 费</th>\n",
       "      <th>环比上月</th>\n",
       "      <th>张贴日期</th>\n",
       "      <th>产权性质</th>\n",
       "      <th>装修</th>\n",
       "      <th>楼层</th>\n",
       "      <th>室</th>\n",
       "      <th>厅</th>\n",
       "      <th>厨</th>\n",
       "      <th>卫</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>资深经纪上海人：稀缺3房，南北通透采光好，房东急需用钱！</td>\n",
       "      <td>123.50</td>\n",
       "      <td>760</td>\n",
       "      <td>3室2厅0厨2卫</td>\n",
       "      <td>南北</td>\n",
       "      <td>1.5元/平米・月</td>\n",
       "      <td>↑1.44%</td>\n",
       "      <td>西元2017年4月12日</td>\n",
       "      <td>商品房</td>\n",
       "      <td>豪华装修</td>\n",
       "      <td>低层</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>未来规划小上海，精装带地暖，养老，亲子，旅游度假一体化</td>\n",
       "      <td>86.00</td>\n",
       "      <td>76</td>\n",
       "      <td>2室2厅1厨1卫</td>\n",
       "      <td>南北</td>\n",
       "      <td>1.8元/・月</td>\n",
       "      <td>↑4.08%</td>\n",
       "      <td>西元2017年4月13日</td>\n",
       "      <td>商品房</td>\n",
       "      <td>精装修</td>\n",
       "      <td>中层</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>双高铁 不限贷 买一层送一层 送大露台 看房专车接送</td>\n",
       "      <td>90.00</td>\n",
       "      <td>80</td>\n",
       "      <td>3室2厅1厨2卫</td>\n",
       "      <td>南北</td>\n",
       "      <td>暂无资料</td>\n",
       "      <td>↓0.01%</td>\n",
       "      <td>西元2017年4月14日</td>\n",
       "      <td>商品房</td>\n",
       "      <td>精装修</td>\n",
       "      <td>高层</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>坐拥嘉兴海宁双轨 享受市区配套 现房值得拥有 可落户</td>\n",
       "      <td>90.00</td>\n",
       "      <td>80</td>\n",
       "      <td>2室1厅1厨1卫</td>\n",
       "      <td>南北</td>\n",
       "      <td>暂无资料</td>\n",
       "      <td>↓0.01%</td>\n",
       "      <td>西元2017年4月15日</td>\n",
       "      <td>商品房</td>\n",
       "      <td>简装修</td>\n",
       "      <td>中层</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>对口明强校区，精装两房税费少，位置安静，媲美园艺新村，随时看</td>\n",
       "      <td>66.61</td>\n",
       "      <td>285</td>\n",
       "      <td>2室1厅1厨1卫</td>\n",
       "      <td>南</td>\n",
       "      <td>0.8元/平米・月</td>\n",
       "      <td>↓2.62%</td>\n",
       "      <td>西元2017年4月16日</td>\n",
       "      <td>个人产权</td>\n",
       "      <td>中装修</td>\n",
       "      <td>低层</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                               标题    建筑面积   总价        户型  朝向      物 业 费  \\\n",
       "0    资深经纪上海人：稀缺3房，南北通透采光好，房东急需用钱！  123.50  760  3室2厅0厨2卫  南北  1.5元/平米・月   \n",
       "1     未来规划小上海，精装带地暖，养老，亲子，旅游度假一体化   86.00   76  2室2厅1厨1卫  南北    1.8元/・月   \n",
       "2      双高铁 不限贷 买一层送一层 送大露台 看房专车接送   90.00   80  3室2厅1厨2卫  南北       暂无资料   \n",
       "3      坐拥嘉兴海宁双轨 享受市区配套 现房值得拥有 可落户   90.00   80  2室1厅1厨1卫  南北       暂无资料   \n",
       "4  对口明强校区，精装两房税费少，位置安静，媲美园艺新村，随时看   66.61  285  2室1厅1厨1卫   南  0.8元/平米・月   \n",
       "\n",
       "     环比上月          张贴日期  产权性质    装修  楼层  室  厅  厨  卫  \n",
       "0  ↑1.44%  西元2017年4月12日   商品房  豪华装修  低层  3  2  0  2  \n",
       "1  ↑4.08%  西元2017年4月13日   商品房   精装修  中层  2  2  1  1  \n",
       "2  ↓0.01%  西元2017年4月14日   商品房   精装修  高层  3  2  1  2  \n",
       "3  ↓0.01%  西元2017年4月15日   商品房   简装修  中层  2  1  1  1  \n",
       "4  ↓2.62%  西元2017年4月16日  个人产权   中装修  低层  2  1  1  1  "
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
